/*--------------------------------------------------------------------------------
	DESCRIPTION: Cleaning Swiss House Panel longitudinal dataset

--------------------------------------------------------------------------------*/

clear
use "$data_dir/raw/shplong_p_user.dta"

keep if status==0
merge 1:1 idpers year using "$data_dir/raw/imputed_income_pers_long_shp.dta", nogen

rename idpers x11101ll

merge 1:1 x11101ll year using "$data_dir/raw/shpequiv_long.dta"
drop if _merge==2
drop _merge

rename x11101ll idpers
rename l11101 canton

decode canton, gen(canton_name)
drop canton
rename canton_name canton
gen canton_code = substr(canton, 1, 2)

rename year survey_year
gen ccode = "CHE"

**********************************
*************TRUST VALUES*********
**********************************

rename pp04 trust_fed_govt
rename pp45 trust_others

local varlist trust_fed_govt trust_others
foreach var of local varlist {
	replace `var' = . if `var'<0 & `var'!=.
}

local varlist trust_fed_govt trust_others
foreach var of local varlist {
	
	gen `var'_med = .

	forvalues i = 1999/2023 {
		cap summ `var' if survey_year==`i', de
		replace `var'_med = 1 if `var'>=r(p50) & `var'!=. & survey_year==`i' & `var'_med==.
		replace `var'_med = 0 if `var'<r(p50) & `var'!=. & survey_year==`i' & `var'_med==.
	}
}

********************************
***********OTHER VALUES*********
********************************

rename pn57 cheat_taxes
rename pp01 interest_politics

local varlist interest_politics
foreach var of local varlist {
	
	replace `var' = . if `var'<0 & `var'!=.
	
	gen `var'_med = .

	forvalues i = 1999/2023 {
		cap summ `var' if survey_year==`i', de
		replace `var'_med = 1 if `var'>=r(p50) & `var'!=. & survey_year==`i' & `var'_med==.
		replace `var'_med = 0 if `var'<r(p50) & `var'!=. & survey_year==`i' & `var'_med==.
	}
}

//need to do ad hoc cheat taxes because median is 0
gen cheat_taxes_med = .
replace cheat_taxes_med = 1 if cheat_taxes>0 & cheat_taxes!=.
replace cheat_taxes_med = 0 if cheat_taxes==0

************************************************
*************INDIVIDUAL CHARACTERISTICS*********
************************************************

gen yob = survey_year-age

gen generation = ""
replace generation = "lost_gen" if yob >= 1883 & yob <= 1900 
replace generation = "greatest_gen" if yob >= 1901 & yob <= 1927
replace generation = "silent_gen" if yob >= 1928 & yob <= 1945
replace generation = "baby_boomer" if yob >= 1946 & yob <= 1964
replace generation = "gen_x" if yob >= 1965 & yob <= 1980
replace generation = "millenial" if yob >= 1981 & yob <= 1996
replace generation = "gen_z" if yob >= 1997 & yob <= 2012

gen cohort = ""
forvalues i = 1870(10)2010 {
	replace cohort = "`i's" if yob>=`i' & yob<(`i'+10)
}

*edu
*			1   Primary
*			2   Secondary
*			3   Tertiary
*			4   Other
*		  -99   Missing

gen edu = .
replace edu = 1 if inlist(isced, 10, 20)
replace edu = 2 if inlist(isced, 31, 32, 33)
replace edu = 3 if inlist(isced, 41, 51, 52, 60)
replace edu = 4 if inlist(isced, 0)
replace edu = -99 if inlist(isced, -6, -3, -2, -1)

*occupation                                         
*			0  		Armed forces                             
*			1  		Legislators, senior officials, managers  
*			2  		Professionals                            
*			3  		Technicians and associate professionals  
*			4  		Clercs                                   
*			5  		Service workers, market sales workers    
*			6  		Skilled agricultural and fishery workers 
*			7  		Craft and related trades workers         
*			8  		Plant and machine operater assemblers    
*			9  		Elementary occupations
*	      -99		Missing

gen occupation = is1maj
replace occupation = -99 if is1maj<0 & is1maj!=.

rename is2maj isco88_2digit

*employment_status
*			1   Employed
*			2   Unemployed
*			3   Other
*		  -99   Missing

gen employment_status = .
replace employment_status = 1 if wstat==1
replace employment_status = 2 if wstat==2
replace employment_status = 3 if wstat==3
replace employment_status = -99 if wstat==.

*income (decile)
*			1   Band 1
*			2   Band 2
*			3   Band 3
*			4   Band 4
*			5   Band 5
*			6   Band 6
*			7   Band 7
*			8   Band 8
*			9   Band 9
*		   10   Band 10
*		   11   Band 11
*		  -99   Missing

gen income = .
forvalues i = 1999/2023 {
	xtile income`i' = iptotni if survey_year==`i' & iptotni!=., nq(10)
	replace income = income`i' if income==. & survey_year==`i'
	drop income`i'
}


*income (quintile)
*			1   Band 1
*			2   Band 2
*			3   Band 3
*			4   Band 4
*			5   Band 5
*		  -99   Missing

gen income_quintile = .
forvalues i = 1999/2023 {
	xtile income_quintile`i' = iptotni if survey_year==`i' & iptotni!=., nq(5)
	replace income_quintile = income_quintile`i' if income_quintile==. & survey_year==`i'
	drop income_quintile`i'
}

gen imputed_income = 1 if impptotn==1
replace imputed_income = 0 if impptotn==0


*real income

rename survey_year year
merge m:1 year using "$data_dir/raw/swiss_cpi.dta"
drop if _merge==2
drop _merge
rename year survey_year

summ cpi_1939 if survey_year==2011
gen real_income = iptotni * r(mean) / cpi_1939 //convert income to real francs in 2011
gen ln_real_income = ln(real_income+0.01)


*real income growth

xtset idpers survey_year
gen income_growth = d.ln_real_income


*marital
*			1   Married
*			2   Not Married
*		  -99   Missing

cap drop marital
gen marital = 1 if civsta==2
replace marital = 2 if inlist(civsta, 1, 3, 4, 5, 6, 7, -1, -2)
replace marital = -99 if marital==.


*ideology
*			0   Left  
*			1        
*			2        
*			3        
*			4        
*			5        
*			6        
*			7        
*			8        
*			9        
*			10   Right
*		   -99   Missing

gen ideology = pp10
replace ideology = -99 if ideology<0 & ideology!=.


*party affiliated
*			1    Member (active or passive)  
*			2    Not a member
*		   -99   Missing

gen party_member = 1 if pn43==1|pn43==2
replace party_member = 2 if pn43==3
replace party_member = -99 if party_member<0 & party_member!=.


*lived in switzerland
*			1	Since birth
*			0	Not since birth
*		  -99	Missing

gen live_switz = 1 if hab_ch==-5 | hab_ch==yob
replace live_switz = 0 if hab_ch>0 & hab_ch!=yob
replace live_switz = -99 if inlist(hab_ch, -1, -2, -3)

*language
*			1	French
*			2	German
*			3	Italian
*		  -99	Missing

gen language = 1 if plingu==1
replace language = 2 if plingu==2
replace language = 3 if plingu==3
replace language = -99 if plingu<0 & plingu!=.

keep idpers idhous canton canton_code survey_year sex age ccode trust_fed_govt_med trust_others_med cheat_taxes_med interest_politics_med yob cohort generation edu occupation employment_status income income_growth ln_real_income real_income ideology live_switz marital language wicss w11101 w11102 w11113 w11105 imputed_income party_member income_quintile isco88_2digit

order ccode idpers idhous canton_code canton survey_year yob age cohort sex edu occupation employment_status marital income income_quintile real_income ln_real_income income_growth ideology live_switz

xtset idpers survey_year

save "$data_dir/clean/shp_final_clean.dta", replace
